Plotting the Correlation between Air Quality and Weather

In [1]:
# If done right, this program should
# Shoutout to my bois at StackOverflow - you da real MVPs

import pandas as pd
import numpy as np
from bokeh.plotting import figure
from import show
from bokeh.models import HoverTool, Label
import scipy.stats

weatherfile = input("Which weather file would you like to use? ")
df = pd.read_csv(weatherfile)

temp = df.as_matrix(columns=df.columns[3:4])
temp = temp.ravel()

humidity = df.as_matrix(columns=df.columns[4:5])
humidity = humidity.ravel()

pressure = df.as_matrix(columns=df.columns[5:])
pressure = pressure.ravel()

unix_timeweather = df.as_matrix(columns=df.columns[2:3])

i = 0

w_used = eval(raw_input("Which data set do you want? temp, humidity, or pressure? "))

aqfile = input("Which air quality file would you like to use? ")
df2 = pd.read_csv(aqfile)

PM25 = df2.as_matrix(columns=df2.columns[4:5])
PM1 = df2.as_matrix(columns=df2.columns[3:4])
PM10 = df2.as_matrix(columns=df2.columns[5:])

unix_timeaq = df2.as_matrix(columns=df2.columns[2:3])

aq_used = eval(raw_input("Which data set do you want? PM1, PM25, or PM10? "))


def find_nearest(array, value):

    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    if np.abs(array[idx]-value) <= 30:
        # print str(value) + "Vs" + str(array[idx])
        return idx
        return None


def make_usable(array1, array):
    i = len(array1) - 1
    while i > 0:
        if np.isnan(array[i]) or np.isnan(array1[i]):
            del array[i]
            del array1[i]
        i = i - 1


weatherarr = []
aqarr = []

i = 0

while i < len(aq_used):


    nearest_time = find_nearest(unix_timeweather, unix_timeaq[i])

    if nearest_time is None:

    i = i+1

# Plot the arrays #####################################################################


hoverp = HoverTool(tooltips=[("(x,y)", "($x, $y)")])

p = figure(tools = [hoverp])
correlation = Label(x=50, y=50, x_units='screen', y_units='screen', text="Pearson r and p: "+ str(scipy.stats.pearsonr(weatherarr, aqarr)),render_mode='css',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)

p.add_layout(correlation) = weatherarr, y = aqarr, color = "firebrick")


Which weather file would you like to use? 'chs_os_weather.csv'
Which data set do you want? temp, humidity, or pressure? temp
Which air quality file would you like to use? 'chs_os_aq.csv'
Which data set do you want? PM1, PM25, or PM10? PM10